package com.shujia.core

import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}

object Demo5Sample {
  def main(args: Array[String]): Unit = {
    //创建Spark环境
    val conf = new SparkConf()
    conf.setMaster("local")
    conf.setAppName("map")

    val sc = new SparkContext(conf)

    //读取数据
    val linesRDD: RDD[String] = sc.textFile("spark/data/students.csv")

    //sample: 抽样
    val sampleRDD: RDD[String] = linesRDD.sample(true, 0.1)

    sampleRDD.foreach(println)
  }

}
